/*
This file contains the code to do the RD analysis in Rodgers and Hambur (2018) 'The GFC Investment Tax Break'
It is broken up into a number of individual pieces
*/

** Set up locals for the  program - make changes here ****
*Set up local for data saving
local location 
local datalocation

* Set plus directory with rdrobust and rddensity
sysdir set PLUS 

*** A. Make full database

clear
*** MERGE DATABASES  ********
* Loop through each divisions file, appending it on
forvalues i = 1/19 {

append using "`datalocation'\div`i'.dta"
}

*Drop government 
drop if x_tolo > 20 & pp == 1
drop if x_tolo > 20 & pp == 0
drop if x_tolo == 0

*Drop variables unlikely to use
keep id tsid cap go x_anzsic k_stock intuse hcnt bas_wages fte profit total_business_income sales_of bit_type x_state x_pcode x_tolo x_sisca08 foreign_share

***** GENERATE A FEW VARIABLES WE ARE LIKELY TO NEED *****
* Rename and scale a few variables for ease
rename bas_wages wages
rename profit_or profit
rename sales_of sales
rename total_business_income busincome
rename k_stock kstock
rename x_state state
rename x_pcode postcode
rename foreign_share foreign


foreach j in busincome go cap intuse kstock wages sales profit {
replace `j' = `j'/1000000
}


*Turn a few varaibles that are string to nubmers
rename id new_id
*Take the number part of the string
g id = substr(new_id, 3, 10) 

destring id, replace
g year = tsid*1
tsset id year
drop tsid
g industry = x_anzsic*1
drop x_anzsic


*Generate indicator for divisions
g division = "A" if industry < 600
replace division = "B" if industry > 599 & industry < 1100
replace division = "C" if industry > 1099 & industry < 2600
replace division = "D" if industry > 2599 & industry < 3000
replace division = "E" if industry > 2999 & industry < 3300
replace division = "F" if industry > 3299 & industry < 3900
replace division = "G" if industry > 3899 & industry < 4400
replace division = "H" if industry > 4399 & industry < 4600
replace division = "I" if industry > 4599 & industry < 5400
replace division = "J" if industry > 5399 & industry < 6200
replace division = "K" if industry > 6199 & industry < 6500
replace division = "L" if industry > 6599 & industry < 6800
replace division = "M" if industry > 6799 & industry < 7100
replace division = "N" if industry > 7199 & industry < 7400
replace division = "O" if industry > 7499 & industry < 8000
replace division = "P" if industry > 7999 & industry < 8300
replace division = "Q" if industry > 8399 & industry < 8800
replace division = "R" if industry > 8899 & industry < 9300
replace division = "S" if industry > 9399 & industry < 9999

drop if  division == "B" | division == "K"

g div = 1
replace div = 2 if industry > 599 & industry < 1100
replace div = 3 if industry > 1099 & industry < 2600
replace div = 4 if industry > 2599 & industry < 3000
replace div = 5 if industry > 2999 & industry < 3300
replace div = 6 if industry > 3299 & industry < 3900
replace div = 7 if industry > 3899 & industry < 4400
replace div = 8 if industry > 4399 & industry < 4600
replace div = 9 if industry > 4599 & industry < 5400
replace div = 10 if industry > 5399 & industry < 6200
replace div = 11 if industry > 6199 & industry < 6500
replace div = 12 if industry > 6599 & industry < 6800
replace div = 13 if industry > 6799 & industry < 7100
replace div = 14 if industry > 7199 & industry < 7400
replace div = 15 if industry > 7499 & industry < 8000
replace div = 16 if industry > 7999 & industry < 8300
replace div = 17 if industry > 8399 & industry < 8800
replace div = 18 if industry > 8899 & industry < 9300
replace div = 19 if industry > 9399 & industry < 9999

* Geneate a few series we might need
xtset id year

g lkstock = log(kstock)

g lsales = log(sales)

g sale_grow = (sales-l.sales)/l.sales


g lcap = log(cap)

g y_09 = 0

replace y_09 = 1 if year == 2009

g y_10 = 0

replace y_10 = 1 if year == 2010



*Generate size indicators and treatment and control indicators
xtset id year

g control = 0
replace control = 1 if  l.sales>2 & l2.sales>2

g treatment = 0
replace treatment = 1 if  l.sales>2 & l2.sales<=2

g keep = 1 if treatment==1
replace keep = 1 if control==1

save "`location'\TAX_CREDIT_large.dta", replace


* B Turn full sample into RD sample

use "`location'\TAX_CREDIT_large.dta", clear

*Generate lagging and leading versions of variables
g sales2 = l2.sales
g sales1 = l.sales
g f_sales = f.sales
g f2_sales = f2.sales

g sales_grow2 = l2.sale_grow
g sales_grow1 = l.sale_grow
g f_sales_grow = f.sale_grow
g f2_sales_grow = f2.sale_grow


g lcap1 = l.lcap
g lcap2 = l2.lcap
g f_lcap =f.lcap
g f2_lcap = f2.lcap

g f_fte = f.fte
g f2_fte = f2.fte
g fte1 = l.fte
g fte2=l2.fte

g f_dfte = f.d.fte
g f2_dfte = f2.d.fte
g dfte = d.fte
g dfte1 = l.d.fte
g dfte2=l2.d.fte
g d2_fte = f_fte-fte1

g hcnt1 = l.hcnt
g hcnt2 = l2.hcnt
g f_hcnt = f.hcnt
g f2_hcnt = f2.hcnt

g dhcnt1 = l.d.hcnt
g dhcnt2 = l2.d.hcnt
g dhcnt = d.hcnt
g f_dhcnt = f.d.hcnt
g f2_dhcnt = f2.d.hcnt
g d2_hcnt = f_hcnt - hcnt1


g f_wages = f.wages
g f2_wages = f2.wages

*Generate exit indicators
local max_year = 2015
bysort id (year): g exit = (_n == _N & year < `max_year')

xtset id year

g exit1 = l.exit
g exit2 = l2.exit
g f_exit = f.exit 
g f2_exit = f2.exit

xtset id year

drop if keep == .


drop keep

save "`location'\TAX_CREDIT_RDD_large.dta", replace


*C. Create discontinity graphs such as Figures 3 and 4 

use "`location'\TAX_CREDIT_RDD_large.dta", clear 


** Set up locals for the disconitnuity graph you want
local use_year = 2010
local ceiling = 2.5
local lower_b = 1.7
local upper_b = 2.3

*** To drop those with sales too high this year
drop if sales<2

drop if year != `use_year'


**Get coefficients for linear regression

*Residuals from reg investment on industry
reg lcap i.industry if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'

predict resid if year == `use_year', residuals

*Full
reg resid treatment sales2 if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'

g cons = _b[_cons]
g t_coef = _b[treatment]
g sales_coef = _b[sales2]

*Above
reg resid  sales2 if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year' & treatment == 0

g cons_t0 = _b[_cons]
g sales_coef_t0 = _b[sales2]

*Below
reg resid  sales2 if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year' & treatment == 1

g cons_t1 = _b[_cons]
g sales_coef_t1 = _b[sales2]

*Residuals from reg investment on industry
*Full
reg lcap treatment sales2 if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'

g cons_l = _b[_cons]
g t_coef_l = _b[treatment]
g sales_coef_l = _b[sales2]

*Above
reg lcap sales2 if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'  & treatment == 0

g cons_l_t0 = _b[_cons]
g sales_coef_l_t0 = _b[sales2]

*Below
reg lcap treatment sales2 if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year' & treatment == 1

g cons_l_t1 = _b[_cons]
g sales_coef_l_t1 = _b[sales2]

**** Need to put in some set of polynomial points here
g poly_points = .

*Construct polynomial fit 

lpoly lcap sales2 if year == 2010 & sales1<`ceiling' & sales2 >2 & sales2< `upper_b', degree(3) generate(smooth_grid_high3 smooth_high3) at(poly_points) bw(.05)

lpoly lcap sales2 if year == 2010 & sales1<2.5 & sales2 <2 & sales2>`lower_b', degree(3) generate(smooth_grid_low3 smooth_low3) at(poly_points) bw(0.05)

lpoly lcap sales2 if year == 2010 & sales1<`ceiling' & sales2 >2 & sales2< `upper_b', degree(2) generate(smooth_grid_high2 smooth_high2) at(poly_points) bw(0.05)

lpoly lcap sales2 if year == 2010 & sales1<2.5 & sales2 <2 & sales2>`lower_b', degree(2) generate(smooth_grid_low2 smooth_low2) at(poly_points) bw(0.05)


lpoly resid sales2 if year == 2010 & sales1<2.5 & sales2 >2 & sales2< `upper_b', degree(3) generate(smooth_grid_high_res3 smooth_high_res3) at(poly_points) bw(0.05)

lpoly resid sales2 if year == 2010 & sales1<2.5 & sales2 <2 & sales2>`lower_b', degree(3) generate(smooth_grid_low_res3 smooth_low_res3) at(poly_points) bw(0.05)

lpoly resid sales2 if year == 2010 & sales1<2.5 & sales2 >2 & sales2< `upper_b', degree(2) generate(smooth_grid_high_res2 smooth_high_res2) at(poly_points) bw(0.05)

lpoly resid sales2 if year == 2010 & sales1<2.5 & sales2 <2 & sales2>`lower_b', degree(2) generate(smooth_grid_low_res2 smooth_low_res2) at(poly_points) bw(0.05)

*** Construct linear and average for buckets
*Collapase down to required buckets for linear and dots
preserve

*Set bins
local bin = 0.02

g sales2_round = `bin'*floor(sales2/`bin') if sales2<`upper_b' & sales2>`lower_b' & sales1<`ceiling' & year == `use_year'
drop if sales2_round == .

collapse(mean) resid lcap cons t_coef sales_coef cons_l t_coef_l sales_coef_l cons_t0 cons_t1 sales_coef_t0 sales_coef_t1 cons_l_t0 cons_l_t1 sales_coef_l_t0 sales_coef_l_t1,  by(sales2_round)

g fitted =  sales_coef*sales2_round+cons
replace fitted = fitted+  t_coef if sales2<2

g fitted_l =  sales_coef_l*sales2_round+cons_l
replace fitted_l = fitted_l+  t_coef_l if sales2<2

g fitted_sep = sales_coef_t0*sales2_round+cons_t0
replace  fitted_sep = sales_coef_t1*sales2_round+cons_t1 if sales2<2

g fitted_l_sep = sales_coef_l_t0*sales2_round+cons_l_t0
replace  fitted_l_sep = sales_coef_l_t1*sales2_round+cons_l_t1 if sales2<2

twoway(dot  resid sales2_round) (dot fitted sales2)

twoway(dot  lcap sales2_round) (dot fitted_l sales2)

twoway(dot  resid sales2_round) (dot fitted_sep sales2)

twoway(dot  lcap sales2_round) (dot fitted_l_sep sales2)

restore



***** D. Graph of sales by bucket such as in Figure 6***************

use "`location'\TAX_CREDIT_RDD_large.dta", clear 
*Make graph of sales2 

*** To drop those only in based current year
 drop if sales<2

 *Set bin width
local bin = 0.05
g sales2_round = `bin'*floor(sales2/`bin') if sales2<2.5 & sales2>1.5 & sales1<2.5 & year == 2010
table sales2_round, c(n id)





******E. Regressions such as Tables 2, 3 and 4

use "`location'\TAX_CREDIT_RDD_large.dta", clear 

* Local variables to set up year, ceiling and bounds
local use_year = 2010
local ceiling = 2.5
local lower_b = 2-    .27025381
local upper_b = 2+    .27025381


drop if year != `use_year'

*** To drop those only in based current year
drop if  sales<2



*Construct regression variables
g sales2_c = sales2-2
g sales2_2c = sales2_c^2
g sales2_3c = sales2_c^3
g sales2_4c = sales2_c^4
g sales2_5c = sales2_c^5
g sales2_6c = sales2_c^6

g sales2_t = sales2_c*treatment
g sales2_2t =  sales2_2c*treatment
g sales2_3t =  sales2_3c*treatment
g sales2_4t =  sales2_4c*treatment
g sales2_5t =  sales2_5c*treatment
g sales2_6t =  sales2_6c*treatment 
 


 ****Baseline spec - Model 1
 reg lcap treatment sales2_c sales2_t if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', robust
 *Dif cluster
 reg lcap treatment sales2_c sales2_t  if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', vce(cluster industry)
 reg lcap treatment sales2_c sales2_t  if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', vce(cluster division)


 ****Covar spec - Model 2
 reg lcap treatment sales sales1 sales2_c sales2_t if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', robust


 
 **** Specs with industry and division means - industry pos too few per industry
reg lcap treatment sales sales1  sales2_c sales2_t i.industry  if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', robust
* Model 3
xi: reg lcap treatment sales sales1  sales2_c sales2_t i.division if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', robust
* And clustering
reg lcap treatment sales2_c sales2_t i.industry  if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', vce(cluster industry)
xi: reg lcap treatment sales2_c sales2_t i.division if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', vce(cluster division) 

** Quadratics
 *Model 4
xi: reg lcap treatment sales2_c sales2_t sales2_2c sales2_2t i.division if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', robust
 **Model 5
 * Quad
xi: reg lcap treatment sales sales1 sales2_c sales2_t sales2_2c sales2_2t i.division if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year', robust


** RBrobust - used for models 7 and 8 (change sample)
 rdrobust lcap sales2_c if sales1<2.5
 
 rdrobust lcap sales2_c if sales1<2.5, covs(sales sales1)


** Test for manipulation
rddensity sales2_c if sales1<`ceiling' & year == `use_year'


******** F: Testing for sufficient order polynominal as in Section 5.1 **********
use "`location'\TAX_CREDIT_RDD_large.dta", clear 

* Local variables to set up year, ceiling and bounds
local use_year = 2010
local ceiling = 2.5
local lower_b = 2-    .27025381
local upper_b = 2+    .27025381


g sales2_round = 0.01*floor(sales2/0.01) if sales2<`upper_b' & sales2>`lower_b' & sales1<`ceiling'

local lower_b100 = round(`lower_b'*100,1)
local upper_b100 = round(`upper_b'*100,1)

forvalues i = `lower_b100'(1)`upper_b100'{
	if `i'>`lower_b100' & `i' <`upper_b100'-1 {
 		g d_`i' = 0
 		replace  d_`i' = 1 if sales2_round > `i'/100 -.0001 & sales2_round < `i'/100 +.0001
 	}
 }
 
 * Use model wish to test
 reg lcap treatment sales2_c sales2_t d_* if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'
* reg lcap treatment sales sales1 sales2_c sales2_t d_* if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'
* reg lcap treatment i.industry sales sales1 sales2_c sales2_t d_* if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'
* reg lcap treatment i.div sales sales1 sales2_c sales2_t d_* if sales2>`lower_b' & sales2<`upper_b' & sales1<`ceiling' & year == `use_year'

 
forvalues i = `lower_b100'(1)`upper_b100'{
 	if `i' == `lower_b100'+1 {
 		test d_`i' = 0
 	}
	if `i'>`lower_b100'+1 & `i' <`upper_b100'-1 {
	
 		test d_`i' = 0, accumulate
	}
 }
 
 drop d_*

**** G. Regression with differening bandwidths as for Figures B1 and B2
 use "`location'\TAX_CREDIT_RDD_large.dta", clear 
 
 local use_year = 2010
 local ceiling = 2.5

*Construct regression variables
 g sales2_c = sales2-2
g sales2_2c = sales2_c^2
g sales2_3c = sales2_c^3
g sales2_4c = sales2_c^4
g sales2_5c = sales2_c^5
g sales2_6c = sales2_c^6

g sales2_t = sales2_c*treatment
g sales2_2t =  sales2_2c*treatment
g sales2_3t =  sales2_3c*treatment
g sales2_4t =  sales2_4c*treatment
g sales2_5t =  sales2_5c*treatment
g sales2_6t =  sales2_6c*treatment 
 
*** To drop those  based current year
drop if  sales<2

 quietly {
 local i = 0.05
 while `i' <= 1 {
 

 		reg lcap treatment i.div sales sales1 sales2_c sales2_t if sales2> 2-`i' & sales2< 2+ `i' & sales1<`ceiling' & year == `use_year', robust
 		
 		matrix b_1 = e(b)
 		scalar bs_1 = b_1[1,1]
 		matrix v_1 = e(V)
 		scalar vs_1 = v_1[1,1]
 		scalar bound_1 = sqrt(vs_1)*invttail(e(df_r),0.025)
 		scalar ll_1 = bs_1 - bound_1 
 		scalar ul_1 = bs_1 + bound_1
 	
 		reg lcap treatment i.div sales sales1 sales2_c sales2_t sales2_2c sales2_2t if sales2> 2-`i' & sales2< 2+ `i' & sales1<`ceiling' & year == `use_year', robust

 		matrix b_2 = e(b)
 		scalar bs_2 = b_2[1,1]
 		matrix v_2 = e(V)
 		scalar vs_2 = v_2[1,1]
 		scalar bound_2 = sqrt(vs_2)*invttail(e(df_r),0.025)
 		scalar ll_2 = bs_2 - bound_2 
 		scalar ul_2 = bs_2 + bound_2
 
 		reg lcap treatment  i.div sales sales1 sales2_c sales2_t sales2_2c sales2_2t sales2_3c sales2_3t if sales2> 2-`i' & sales2< 2+ `i' & sales1<`ceiling' & year == `use_year', robust
 	
  	matrix b_3 = e(b)
 		scalar bs_3 = b_3[1,1]
 		matrix v_3 = e(V)
 		scalar vs_3 = v_3[1,1]
 		scalar bound_3 = sqrt(vs_3)*invttail(e(df_r),0.025)
 		scalar ll_3 = bs_3 - bound_3 
 		scalar ul_3 = bs_3 + bound_3	
 	
 		reg lcap treatment i.div sales sales1 sales2_c sales2_t sales2_2c sales2_2t sales2_3c sales2_3t sales2_4c sales2_4t if sales2> 2-`i' & sales2< 2+ `i' & sales1<`ceiling' & year == `use_year', robust

  	matrix b_4 = e(b)
 		scalar bs_4 = b_4[1,1]
 		matrix v_4 = e(V)
 		scalar vs_4 = v_4[1,1]
 		scalar bound_4 = sqrt(vs_4)*invttail(e(df_r),0.025)
 		scalar ll_4 = bs_4 - bound_4 
 		scalar ul_4 = bs_4 + bound_4	

		matrix bs = ( bs_1 \ bs_2 \ bs_3 \ bs_4)
		
		matrix lls = ( ll_1 \ ll_2 \ ll_3 \ ll_4)
		
		matrix uls = ( ul_1 \ ul_2 \ ul_3 \ ul_4)

 		matrix B = (nullmat(B), bs)
 		matrix LL = (nullmat(LL), lls)
 		matrix UL = (nullmat(UL), uls)
 		matrix n = e(N)
 		matrix N = (nullmat(N), e(N))
 		local i = `i' +0.01

 		}
	}	
	

	 
**** H. Regression with differening ceilings, optminal bandwidth, as for Figure B3
 use "`location'\TAX_CREDIT_RDD_large.dta", clear 
 
 local use_year = 2010
local lower_b = 2-   .27025381
local upper_b = 2+   .27025381

drop if year != `use_year'

*** To drop those based current year
drop if  sales<2

*Construct regression variables
 g sales2_c = sales2-2
g sales2_2c = sales2_c^2
g sales2_3c = sales2_c^3
g sales2_4c = sales2_c^4
g sales2_5c = sales2_c^5
g sales2_6c = sales2_c^6

g sales2_t = sales2_c*treatment
g sales2_2t =  sales2_2c*treatment
g sales2_3t =  sales2_3c*treatment
g sales2_4t =  sales2_4c*treatment
g sales2_5t =  sales2_5c*treatment
g sales2_6t =  sales2_6c*treatment 
 


 quietly {
 local i = 0.05
 while `i' <= 2 {
 

 		reg lcap treatment sales1 sales sales2_c sales2_t if sales2>`lower_b' & sales2< `upper_b' & sales1< 2+`i' & year == `use_year', robust
 		
 		matrix b_1 = e(b)
 		scalar bs_1 = b_1[1,1]
 		matrix v_1 = e(V)
 		scalar vs_1 = v_1[1,1]
 		scalar bound_1 = sqrt(vs_1)*invttail(e(df_r),0.025)
 		scalar ll_1 = bs_1 - bound_1 
 		scalar ul_1 = bs_1 + bound_1
 	
 		reg lcap treatment sales2_c sales2_t if sales2>`lower_b' & sales2< `upper_b' & sales1< 2+`i' & year == `use_year', robust

 		matrix b_2 = e(b)
 		scalar bs_2 = b_2[1,1]
 		matrix v_2 = e(V)
 		scalar vs_2 = v_2[1,1]
 		scalar bound_2 = sqrt(vs_2)*invttail(e(df_r),0.025)
 		scalar ll_2 = bs_2 - bound_2 
 		scalar ul_2 = bs_2 + bound_2
 
  	reg lcap treatment i.div sales2_c sales2_t if sales2>`lower_b' & sales2< `upper_b' & sales1< 2+`i' & year == `use_year', robust

 		matrix b_3 = e(b)
 		scalar bs_3 = b_3[1,1]
 		matrix v_3 = e(V)
 		scalar vs_3 = v_3[1,1]
 		scalar bound_3 = sqrt(vs_3)*invttail(e(df_r),0.025)
 		scalar ll_3 = bs_3 - bound_3 
 		scalar ul_3 = bs_3 + bound_3
 


		matrix bs = ( bs_1 \ bs_2 \ bs_3 )
		
		matrix lls = ( ll_1 \ ll_2 \ ll_3 )
		
		matrix uls = ( ul_1 \ ul_2 \ ul_3  )

 		matrix B = (nullmat(B), bs)
 		matrix LL = (nullmat(LL), lls)
 		matrix UL = (nullmat(UL), uls)
 		matrix n = e(N)
 		matrix N = (nullmat(N), e(N))
 		local i = `i' +0.01

 		}
	}	
	


 
 ****** I. Plug in optimal from I&K 2012 as for Section 5.1
 
  use "`location'\TAX_CREDIT_RDD_large.dta", clear 
 
 
 local use_year = 2010
 local ceiling = 2.5
 
 

 drop if year != `use_year'
 drop if sales1 > `ceiling'
 drop if lcap == .
 drop if sales == .
 * Remove those  based this year
 drop if sales <2 
 
 * Set some bondary on investment if wanted
 *drop if cap >10
 
 g sales2_c = sales2-2
g sales2_2c = sales2_c^2
g sales2_3c = sales2_c^3
g sales2_4c = sales2_c^4
g sales2_5c = sales2_c^5
g sales2_6c = sales2_c^6

g sales2_t = sales2_c*treatment
g sales2_2t =  sales2_2c*treatment
g sales2_3t =  sales2_3c*treatment
g sales2_4t =  sales2_4c*treatment
g sales2_5t =  sales2_5c*treatment
g sales2_6t =  sales2_6c*treatment 
 
 ** Step 1: Estimate density and conditional var
 summarize sales2 if sales1<`ceiling' & year == `use_year'
 
 
* Density 
 local SD_sample = r(sd)
 local N_sample = r(N)
 
 local pilot_h = 1.84 * `SD_sample'*(`N_sample')^(-1/5)
 
 summarize lcap if sales1<`ceiling' & year == `use_year' & sales2< 2 & sales2 > 2-`pilot_h'
 
 local N_low = r(N)
 local mean_low = r(mean)
 
  summarize lcap if sales1<`ceiling' & year == `use_year' & sales2> 2 & sales2 < 2+`pilot_h'
 
 local N_high = r(N)
 local mean_high = r(mean)
 
 local dens_c = (`N_low'+`N_high')/(`N_sample'*`pilot_h')
 
 
* Cond var 
 g demean_low = lcap - `mean_low'
 replace demean_low = demean_low^2
 
 g demean_high = lcap - `mean_high'
 replace demean_high =demean_high^2
 
  summarize demean_low if sales1<`ceiling' & year == `use_year' & sales2< 2 & sales2 > 2-`pilot_h'


local sum_error_low = r(mean)*r(N)

  summarize demean_high if sales1<`ceiling' & year == `use_year' & sales2> 2 & sales2 < 2+`pilot_h'


local sum_error_high = r(mean)*r(N)
 
local cond_var = (`sum_error_low'+`sum_error_high')/(`N_high'+`N_low')

** Step 2: Second derivs

summarize sales2 if sales1<`ceiling' & year == `use_year' & sales2>= 2, detail

local med_high = r(p50)

summarize sales2 if sales1<`ceiling' & year == `use_year' & sales2< 2, detail

local med_low = r(p50)

reg lcap treatment sales2_c sales2_2c sales2_3c if sales1<`ceiling' & year == `use_year' & sales2 < `med_high' & sales2 > `med_low'

local m3 = 6*_b[sales2_3c]

local m3 = (`m3')^2

if `m3' < 0.01 {
	local m3 = 0.01
	}

local pilot_h2_plus = 3.56*(`cond_var'/(`dens_c'*`m3'))^(1/7)*(`N_high')^(-1/7)

local pilot_h2_minus = 3.56*(`cond_var'/(`dens_c'*`m3'))^(1/7)*(`N_low')^(-1/7)

reg lcap sales2_c sales2_2c if sales1<`ceiling' & year == `use_year' & sales2 < 2+`pilot_h2_plus' & sales2 > 2

local m2_plus = 2*_b[sales2_2c]

local N2_plus = e(N)

reg lcap sales2_c sales2_2c if sales1<`ceiling' & year == `use_year' & sales2 > 2-`pilot_h2_plus' & sales2 < 2

local m2_minus = 2*_b[sales2_2c]

local N2_minus = e(N)

** Step 3 Regularization terms and optimal

local r_plus = 720*`cond_var'/(`N2_plus'*(`pilot_h2_plus')^4)

local r_minus = 720*`cond_var'/(`N2_minus'*(`pilot_h2_minus')^4)

scalar h_opt = 3.4375*((2*`cond_var')/(`dens_c'*((`m2_plus'-`m2_minus')^2+(`r_plus'+`r_minus'))))^(1/5)*(`N_sample')^(-1/5)
